# Set python environment and version in RStudio ;-)
reticulate::use_python("/Users/Mezhoud/anaconda3/bin/python3", required = TRUE)
reticulate::py_config()
## python:         /Users/Mezhoud/anaconda3/bin/python3
## libpython:      /Users/Mezhoud/anaconda3/lib/libpython3.7m.dylib
## pythonhome:     /Users/Mezhoud/anaconda3:/Users/Mezhoud/anaconda3
## version:        3.7.5 (default, Oct 25 2019, 10:52:18)  [Clang 4.0.1 (tags/RELEASE_401/final)]
## numpy:          /Users/Mezhoud/anaconda3/lib/python3.7/site-packages/numpy
## numpy_version:  1.18.1
## 
## NOTE: Python version was forced by use_python function
Train <- fread("Train.csv")
Train <- Train %>%
  rename(`Altitude (m)` = elevation) %>%
  rename(Longitude = X) %>%
  rename(Latitude = Y) %>%
  separate(Square_ID, into = c("Square_ID", "v1", "v2", "v3", "v4"), sep = "-" , remove = FALSE) %>%
  unite("other", v1, v2,v3, v4, sep = "-") %>%
  select(Longitude,Latitude,`Altitude (m)`,  LC_Type1_mode,Square_ID, other , everything()) 
  #filter(row_number()==1 )
Train %>% head()
##    Longitude Latitude Altitude (m) LC_Type1_mode Square_ID
## 1:     34.26   -15.91     887.7642             9  4e3c3896
## 2:     34.26   -15.90     743.4039             9  4e3c3897
## 3:     34.26   -15.89     565.7283             9  4e3c3898
## 4:     34.26   -15.88     443.3928            10  4e3c3899
## 5:     34.26   -15.87     437.4434            10  4e3c389a
## 6:     34.26   -15.86     405.6317            10  4e3c389b
##                          other target_2015 precip 2014-11-16 - 2014-11-23
## 1: 14ce-11ea-bce5-f49634744a41           0                              0
## 2: 14ce-11ea-bce5-f49634744a41           0                              0
## 3: 14ce-11ea-bce5-f49634744a41           0                              0
## 4: 14ce-11ea-bce5-f49634744a41           0                              0
## 5: 14ce-11ea-bce5-f49634744a41           0                              0
## 6: 14ce-11ea-bce5-f49634744a41           0                              0
##    precip 2014-11-23 - 2014-11-30 precip 2014-11-30 - 2014-12-07
## 1:                              0                              0
## 2:                              0                              0
## 3:                              0                              0
## 4:                              0                              0
## 5:                              0                              0
## 6:                              0                              0
##    precip 2014-12-07 - 2014-12-14 precip 2014-12-14 - 2014-12-21
## 1:                       14.84403                       14.55282
## 2:                       14.84403                       14.55282
## 3:                       14.84403                       14.55282
## 4:                       14.84403                       14.55282
## 5:                       14.84403                       14.55282
## 6:                       14.84403                       14.55282
##    precip 2014-12-21 - 2014-12-28 precip 2014-12-28 - 2015-01-04
## 1:                       12.23777                       57.45136
## 2:                       12.23777                       57.45136
## 3:                       12.23777                       57.45136
## 4:                       12.23777                       57.45136
## 5:                       12.23777                       57.45136
## 6:                       12.23777                       57.45136
##    precip 2015-01-04 - 2015-01-11 precip 2015-01-11 - 2015-01-18
## 1:                       30.12705                       30.44947
## 2:                       30.12705                       30.44947
## 3:                       30.12705                       30.44947
## 4:                       30.12705                       30.44947
## 5:                       30.12705                       30.44947
## 6:                       30.12705                       30.44947
##    precip 2015-01-18 - 2015-01-25 precip 2015-01-25 - 2015-02-01
## 1:                       1.521829                          29.39
## 2:                       1.521829                          29.39
## 3:                       1.521829                          29.39
## 4:                       1.521829                          29.39
## 5:                       1.521829                          29.39
## 6:                       1.521829                          29.39
##    precip 2015-02-01 - 2015-02-08 precip 2015-02-08 - 2015-02-15
## 1:                       32.87832                       8.179804
## 2:                       32.87832                       8.179804
## 3:                       32.87832                       8.179804
## 4:                       32.87832                       8.179804
## 5:                       32.87832                       8.179804
## 6:                       32.87832                       8.179804
##    precip 2015-02-15 - 2015-02-22 precip 2015-02-22 - 2015-03-01
## 1:                      0.9639814                        16.6591
## 2:                      0.9639814                        16.6591
## 3:                      0.9639814                        16.6591
## 4:                      0.9639814                        16.6591
## 5:                      0.9639814                        16.6591
## 6:                      0.9639814                        16.6591
##    precip 2015-03-01 - 2015-03-08 precip 2015-03-08 - 2015-03-15
## 1:                       3.304466                              0
## 2:                       3.304466                              0
## 3:                       3.304466                              0
## 4:                       3.304466                              0
## 5:                       3.304466                              0
## 6:                       3.304466                              0
##    precip 2019-01-20 - 2019-01-27 precip 2019-01-27 - 2019-02-03
## 1:                       12.99262                       4.582856
## 2:                       12.99262                       4.582856
## 3:                       12.99262                       4.582856
## 4:                       12.99262                       4.582856
## 5:                       12.99262                       4.582856
## 6:                       12.99262                       4.582856
##    precip 2019-02-03 - 2019-02-10 precip 2019-02-10 - 2019-02-17
## 1:                       35.03753                       4.796012
## 2:                       35.03753                       4.796012
## 3:                       35.03753                       4.796012
## 4:                       35.03753                       4.796012
## 5:                       35.03753                       4.796012
## 6:                       35.03753                       4.796012
##    precip 2019-02-17 - 2019-02-24 precip 2019-02-24 - 2019-03-03
## 1:                       28.08331                              0
## 2:                       28.08331                              0
## 3:                       28.08331                              0
## 4:                       28.08331                              0
## 5:                       28.08331                              0
## 6:                       28.08331                              0
##    precip 2019-03-03 - 2019-03-10 precip 2019-03-10 - 2019-03-17
## 1:                       58.36246                       18.26469
## 2:                       58.36246                       18.26469
## 3:                       58.36246                       18.26469
## 4:                       58.36246                       18.26469
## 5:                       58.36246                       18.26469
## 6:                       58.36246                       18.26469
##    precip 2019-03-17 - 2019-03-24 precip 2019-03-24 - 2019-03-31
## 1:                       17.53749                      0.8963228
## 2:                       17.53749                      0.8963228
## 3:                       17.53749                      0.8963228
## 4:                       17.53749                      0.8963228
## 5:                       17.53749                      0.8963228
## 6:                       17.53749                      0.8963228
##    precip 2019-03-31 - 2019-04-07 precip 2019-04-07 - 2019-04-14
## 1:                           1.68                              0
## 2:                           1.68                              0
## 3:                           1.68                              0
## 4:                           1.68                              0
## 5:                           1.68                              0
## 6:                           1.68                              0
##    precip 2019-04-14 - 2019-04-21 precip 2019-04-21 - 2019-04-28
## 1:                              0                              0
## 2:                              0                              0
## 3:                              0                              0
## 4:                              0                              0
## 5:                              0                              0
## 6:                              0                              0
##    precip 2019-04-28 - 2019-05-05 precip 2019-05-05 - 2019-05-12
## 1:                              0                              0
## 2:                              0                              0
## 3:                              0                              0
## 4:                              0                              0
## 5:                              0                              0
## 6:                              0                              0
##    precip 2019-05-12 - 2019-05-19
## 1:                              0
## 2:                              0
## 3:                              0
## 4:                              0
## 5:                              0
## 6:                              0

0.1 Delimitate the area of the study

lng1 <- min(Train$Longitude)
lng2 <- max(Train$Longitude)

lat1 <- min(Train$Latitude)
lat2 <- max(Train$Latitude)
  
leaflet(Train) %>%
  addTiles() %>%
  setView(lng = (lng1+lng2)/2, lat = (lat1+lat2)/2, zoom = 8.4) %>%
    addRectangles(
    lng1= lng1, lat1= lat1,
    lng2= lng2, lat2=lat2,
    fillColor = "transparent"
  )
  #addProviderTiles("providers$Esri.NatGeoWorldMap") %>%    # "CartoDB.Positron"
  #addMarkers(~X, ~Y,
  #           popup = ~LC_Type1_mode, label = ~Square_ID,
  #           clusterOptions = markerClusterOptions())

1 Glimpse to Hydrology, Elevation, Landscover, and Population Maps

Hydrology <- EBImage::readImage("Hydrology_bib.png")
Elevation <- EBImage::readImage("elevation_bib.png")
Landcover <- EBImage::readImage("LandCover_bib.png")
Population <- EBImage::readImage("Population_bib.png")

par(mfrow=c(2,2))
plot(Elevation)
title("Elevation")
plot(Landcover)
title("Landscovers")
plot(Hydrology)
title("Hydrology")
plot(Population)
title("Population")

1.1 Glimpse on the Altitude (Elevation) and Type soil in the region using train dataset

1.1.1 Soil Description

  • The Color palette and the description of the Soil Name are loaded from this link.
Soil_pal <- fread("LC_Type1_mode.csv")

#LC_type1_palette <- c("#05450a", "#086a10", "#54a708", "#78d203", "#009900", "#c6b044", "#dcd159", "#dade48", "#fbff13", "#b6ff05", "#27ff87", "#c24f44", "#a5a5a5", "#ff6d4c", "#69fff8", "#f9ffa4", "#1c0dff")

Soil_pal <- Soil_pal %>%
  rename(LC_Type1_mode = Value) %>%
  separate(Description, into = c("Soil Name", "Description"), sep = ":" , remove = FALSE) %>%
  mutate(`Soil Name` = as.factor(`Soil Name`))

Soil_pal
##    LC_Type1_mode   Color                           Soil Name
## 1              1 #05450a        Evergreen Needleleaf Forests
## 2              2 #086a10         Evergreen Broadleaf Forests
## 3              3 #54a708        Deciduous Needleleaf Forests
## 4              4 #78d203         Deciduous Broadleaf Forests
## 5              5 #009900                       Mixed Forests
## 6              6 #c6b044                   Closed Shrublands
## 7              7 #dcd159                     Open Shrublands
## 8              8 #dade48                      Woody Savannas
## 9              9 #fbff13                            Savannas
## 10            10 #b6ff05                          Grasslands
## 11            11 #27ff87                  Permanent Wetlands
## 12            12 #c24f44                           Croplands
## 13            13 #a5a5a5            Urban and Built-up Lands
## 14            14 #ff6d4c Cropland/Natural Vegetation Mosaics
## 15            15 #69fff8              Permanent Snow and Ice
## 16            16 #f9ffa4                              Barren
## 17            17 #1c0dff                        Water Bodies
##                                                                                                Description
## 1                                      dominated by evergreen conifer trees (canopy >2m). Tree cover >60%.
## 2                        dominated by evergreen broadleaf and palmate trees (canopy >2m). Tree cover >60%.
## 3                           dominated by deciduous needleleaf (larch) trees (canopy >2m). Tree cover >60%.
## 4                                    dominated by deciduous broadleaf trees (canopy >2m). Tree cover >60%.
## 5   dominated by neither deciduous nor evergreen (40-60% of each) tree type (canopy >2m). Tree cover >60%.
## 6                                                  dominated by woody perennials (1-2m height) >60% cover.
## 7                                                dominated by woody perennials (1-2m height) 10-60% cover.
## 8                                                                          tree cover 30-60% (canopy >2m).
## 9                                                                          tree cover 10-30% (canopy >2m).
## 10                                                                  dominated by herbaceous annuals (<2m).
## 11                           permanently inundated lands with 30-60% water cover and >10% vegetated cover.
## 12                                                            at least 60% of area is cultivated cropland.
## 13                at least 30% impervious surface area including building materials, asphalt and vehicles.
## 14           mosaics of small-scale cultivation 40-60% with natural tree, shrub, or herbaceous vegetation.
## 15                     at least 60% of area is covered by snow and ice for at least 10 months of the year.
## 16    at least 60% of area is non-vegetated barren (sand, rock, soil) areas with less than 10% vegetation.
## 17                                              at least 60% of area is covered by permanent water bodies.
## Color palette of Land Cover Type 1 (LC_Type1_mode)
require(scales)
scales::show_col(Soil_pal$Color)

# preserve the order of levels as in the Soil_pal dataframe
#levels(Soil_pal$`Soil Name`) <- Soil_pal$`Soil Name`

#Create a custom color scale
require(RColorBrewer)
colors <- Soil_pal$Color
names(colors) <- Soil_pal$`Soil Name` 
colScale <- scale_colour_manual(name = "Soil Name", values = colors)

as.data.frame(colors)
##                                      colors
## Evergreen Needleleaf Forests        #05450a
## Evergreen Broadleaf Forests         #086a10
## Deciduous Needleleaf Forests        #54a708
## Deciduous Broadleaf Forests         #78d203
## Mixed Forests                       #009900
## Closed Shrublands                   #c6b044
## Open Shrublands                     #dcd159
## Woody Savannas                      #dade48
## Savannas                            #fbff13
## Grasslands                          #b6ff05
## Permanent Wetlands                  #27ff87
## Croplands                           #c24f44
## Urban and Built-up Lands            #a5a5a5
## Cropland/Natural Vegetation Mosaics #ff6d4c
## Permanent Snow and Ice              #69fff8
## Barren                              #f9ffa4
## Water Bodies                        #1c0dff
p1 <- Train %>%
  distinct(Square_ID, .keep_all = TRUE) %>%
  ggplot() +
  aes(x = Longitude, y = Latitude, colour = `Altitude (m)`) +
  geom_point(size = 5) +
  scale_colour_gradientn(colours = terrain.colors(10))

## add box to urban zone
p1bis <- p1 +
  geom_rect(aes(xmin = 34.95, xmax = 35.1, ymin = -15.71, ymax =-15.87),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.05, xmax = 35.15, ymin = -16.52, ymax =-16.64),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 34.8, xmax = 34.88, ymin = -15.98, ymax =-16.29),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.39, xmax = 35.41, ymin = -15.77, ymax =-15.85),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 35.38, xmax = 35.4, ymin = -15.24, ymax =-15.3),
               fill = "transparent", color = "black", size = 0.5) +
  ggtitle("Area Altitude distibution and Urban zone localisation")


p2 <- Train %>%
  distinct(Square_ID, .keep_all = TRUE) %>%
  left_join(Soil_pal, by = "LC_Type1_mode") %>%
  #filter(LC_Type1_mode == 13) %>%
  ggplot() +
  aes(x = Longitude, y = Latitude, colour = `Soil Name`) +   
  geom_point(size = 2) +
  colScale +
  #scale_colour_manual(values = unique(full$Color))  +
  geom_rect(aes(xmin = 34.95, xmax = 35.1, ymin = -15.71, ymax =-15.87),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.05, xmax = 35.15, ymin = -16.52, ymax =-16.64),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 34.8, xmax = 34.88, ymin = -15.98, ymax =-16.29),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.39, xmax = 35.41, ymin = -15.77, ymax =-15.85),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 35.38, xmax = 35.4, ymin = -15.24, ymax =-15.3),
               fill = "transparent", color = "black", size = 0.5) +
    ggtitle("Soil Name distibution and Urban zone localisation") 



gridExtra::grid.arrange( p1bis, p2, ncol = 2)

  • We obtain similar plots compared to Elevation and Landcover plots from biblography.

  • The right plot shows less Altitude of the green dark area in the south of the Malawi.

  • The right plot shows the Soil Name distribution. The Urban zone is indicated in grey color. The most large urban zone is the center of the region, loacted in Croplands Soil Name and in relative high altitude (more that 1000 m (yellow)). The four other urban zone are smaller are located in green area which can more exposed to flood.

1.2 Omit text and axis from ggplot

omit <- theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_blank(), axis.text.x=element_blank(),axis.text.y=element_blank(), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), legend.title = element_blank(), legend.position = "none", plot.title =element_blank())

## omit all text, axes background from plot for image segmentation if any
soil <-  Train %>%
  ggplot() +
  aes(x = Longitude, y = Latitude, colour = `Altitude (m)`) +
  geom_point(size = 6) +
  scale_colour_gradient(low = "darkblue", high = "white")+
  omit
# Save image
invisible(ggsave("Soil.jpeg", plot = soil, dpi = 300))
## Saving 14 x 8 in image

1.3 Preprocessing Train data

new_train <- Train %>%
  gather(key = `Week of` , value = Pluviometry, 8:ncol(Train)) %>%
  mutate(`Week of` = str_extract(`Week of`, "\\d{4}-\\d{2}-\\d{2}") ) %>%
  mutate(`Week of` = as.Date(`Week of`, format= "%Y-%m-%d")) %>%
  mutate(Week = lubridate::week(`Week of`)) %>%
  mutate(Year = if_else(`Week of` <= "2015-03-15", 2015, 2019)) %>%
  group_by(Square_ID, Year) %>%
  mutate(cum_Pluvio = cumsum(Pluviometry)) %>%
  ungroup() %>%
  mutate(Target = if_else(Year == 2015, target_2015, 0)) %>%
  left_join(Soil_pal, by = "LC_Type1_mode") %>%
  rename(Soil_type = LC_Type1_mode) %>%
  mutate(`Target Range` = cut(Target, breaks= c(-Inf, 0, 0.25, 0.5, 0.75, 1) ,
                        labels=c("0", "Low" , "Middle" , "High", "Sure"))) %>%
  mutate(Height = cut(`Altitude (m)`, breaks= c( 44, 100 ,400, 650 ,750, 1000 , 2300 , 2804) ,
                        labels=c("R1","L1" , "L2" , "L3", "L4", "L5", "L6"))) %>%
  select(-target_2015)

new_train  %>% head
## # A tibble: 6 x 17
##   Longitude Latitude `Altitude (m)` Soil_type Square_ID other `Week of` 
##       <dbl>    <dbl>          <dbl>     <int> <chr>     <chr> <date>    
## 1      34.3    -15.9           888.         9 4e3c3896  14ce… 2014-11-16
## 2      34.3    -15.9           743.         9 4e3c3897  14ce… 2014-11-16
## 3      34.3    -15.9           566.         9 4e3c3898  14ce… 2014-11-16
## 4      34.3    -15.9           443.        10 4e3c3899  14ce… 2014-11-16
## 5      34.3    -15.9           437.        10 4e3c389a  14ce… 2014-11-16
## 6      34.3    -15.9           406.        10 4e3c389b  14ce… 2014-11-16
## # … with 10 more variables: Pluviometry <dbl>, Week <dbl>, Year <dbl>,
## #   cum_Pluvio <dbl>, Target <dbl>, Color <chr>, `Soil Name` <fct>,
## #   Description <chr>, `Target Range` <fct>, Height <fct>

1.4 Set Water bodies, Permanent Wetland and Rivers areas with independent class even that have the same Altitude with other Soil Name

new_train <- new_train %>%
     mutate(Height = cut(`Altitude (m)`, breaks= c( 44, 100 ,465,480, 700, 715,750, 1000 , 2300 , 2804) ,
                        labels=c("River1","L1" ,"River2" ,"L2" , "L3","L4", "L5", "L6", "L7"))) %>%
  mutate(Height = as.character(Height)) %>%
  mutate(Height = if_else(`Soil Name` == "Water Bodies", "Water", Height)) %>%
 mutate(Height = if_else(`Soil Name` == "Permanent Wetlands", "Wetlands", Height))%>%
  mutate(Height= if_else(Height == "L2" & Target >= 0.7, "River3", Height))

## Add River3 to Height L2 for 2019
SquareID_River3 <- new_train %>% filter(Year == 2015) %>% filter(Height == "River3") %>% distinct(Square_ID, .keep_all = FALSE)

new_train <-new_train %>%
    mutate(Height = if_else(Year == 2019 & Height == "L2" & Square_ID %in% SquareID_River3$Square_ID, "River3", Height)) 

1.5 Visualize Pluviometry

new_train %>%
  ggplot() +
  geom_line( aes(x = `Week of`, y = Pluviometry), color = "blue")+
  geom_line(aes(x = `Week of`, y = cum_Pluvio), color = "red")+
  geom_smooth(aes(x = `Week of`, y = cum_Pluvio), method = "gam", formula = y ~ s(x, bs = "cs"),  color = "black")+
  ylim(0, 400) +
  facet_wrap(Year ~., ncol = 2,scales = "free")

1.6 Which Soil is more in flood?

new_train %>%
  filter(Year == 2015) %>%
  mutate(wrap_mode = as.factor(if_else(Target == 0, "No Risk 2015", "With Risk 2015"))) %>%
  group_by(Soil_type, Height, `Soil Name`, wrap_mode)%>%
  summarise(Frequency = log(n())) %>% 
  ungroup() %>%
  mutate(Soil_Height = as.factor(paste0( Height, Soil_type))) %>%
  ggplot() +
  aes(x = Soil_Height, y= Frequency, fill = `Soil Name`) +
  geom_col() +
  facet_grid(wrap_mode ~ .)+
  theme(legend.position="bottom") 

  #scale_fill_viridis_d(option = "plasma")

1.7 Weight for Soil/Height/Flood

SoilHeight_weight <- new_train %>%
  #filter(Year == 2015) %>%
  mutate(wrap_mode = as.factor(if_else(Target == 0, "No Risk 2015", "With Risk 2015"))) %>%
  group_by(Soil_type, Height,  `Soil Name`, wrap_mode)%>%
  summarise(Frequency = log(n()))%>%
  ungroup() %>%
  mutate(Soil_Height = as.factor(paste0( Height, Soil_type))) %>%
 group_by(Soil_Height) %>%
  mutate(SumFreq= sum(Frequency)) %>%
  #group_by(wrap_mode, add=TRUE) %>%
  mutate(SoilHeight_Weight=Frequency/SumFreq) %>%
  ungroup() %>%
  select(Height, Soil_type, SoilHeight_Weight, wrap_mode ) %>%
  arrange(Height)
  
new_train <- new_train %>%
 mutate(wrap_mode = as.factor(if_else(Target == 0, "No Risk 2015", "With Risk 2015"))) %>%
   left_join(SoilHeight_weight, by = c("Height", "Soil_type", "wrap_mode"))

new_train %>% head
## # A tibble: 6 x 19
##   Longitude Latitude `Altitude (m)` Soil_type Square_ID other `Week of` 
##       <dbl>    <dbl>          <dbl>     <int> <chr>     <chr> <date>    
## 1      34.3    -15.9           888.         9 4e3c3896  14ce… 2014-11-16
## 2      34.3    -15.9           743.         9 4e3c3897  14ce… 2014-11-16
## 3      34.3    -15.9           566.         9 4e3c3898  14ce… 2014-11-16
## 4      34.3    -15.9           443.        10 4e3c3899  14ce… 2014-11-16
## 5      34.3    -15.9           437.        10 4e3c389a  14ce… 2014-11-16
## 6      34.3    -15.9           406.        10 4e3c389b  14ce… 2014-11-16
## # … with 12 more variables: Pluviometry <dbl>, Week <dbl>, Year <dbl>,
## #   cum_Pluvio <dbl>, Target <dbl>, Color <chr>, `Soil Name` <fct>,
## #   Description <chr>, `Target Range` <fct>, Height <chr>, wrap_mode <fct>,
## #   SoilHeight_Weight <dbl>

1.8 Categorize elevation to set slopes and define how water moves in the environment

new_train %>%
  filter(Year == 2015) %>%
  distinct(Square_ID, .keep_all = TRUE) %>%
  ggplot +
  geom_point(aes(x = Longitude, y = Latitude, color = Height)) + #
  geom_point(data = subset(new_train, Target > 0), aes(x = Longitude, y = Latitude,
                                                               colour = `Target Range`, shape = `Target Range`),
            size = 0.8, stroke = 0, shape = 16) +
  #scale_shape_manual(values=c(3, 16, 17, 23), labels = c("0.25", "0.5", "0.75", "1")) +
  scale_colour_manual(values = c( "blue"  ,"green" , "lightgreen" ,  "pink" ,"gold" ,"orange" , "darkorange1" ,"chocolate",   "cyan" ,"lightblue" , "darkseagreen1","darkseagreen1","darkorchid",  "red" ,  "azure2","burlywood1"),
                 labels = c("0.75", "L1", "L2", "L3", "L4","L5", "L6", "L7"  , "0.25", "0.5","River1", "River2","River3", "1", "Water" , "Wetlands" ),name = "Height/Target") +
ggtitle("Map Altitudes and Flood Risks: 7 Height levels and 4 Probabilities range risks") 

  #guides(size = guide_legend(title='Risk Range'))

1.9 Target distribution

  new_train %>%
  filter(Year == 2015) %>%
  distinct(Square_ID, .keep_all = TRUE) %>%
  mutate(wrap_mode = as.factor(if_else(Target == 0, "No Risk 2015", "With Risk 2015"))) %>%
  group_by(`Target Range`, wrap_mode) %>%
  summarise(Frequency = n()) %>%
  ungroup() %>%
  ggplot +
  aes(x = `Target Range`, y = Frequency, colour = `Target Range`) +
  geom_point()+
  facet_wrap(wrap_mode~ ., ncol = 2 , scales = "free") +
 scale_colour_manual(values = terrain.colors(6),labels = c( 0, 0.25, 0.5, 0.75, 1))+
  ggtitle("Flood Risk Distribution") 

#gridExtra::grid.arrange( p3, p2, nrow = 2)

We note:

  • Unbalanced dataset by class 0 without risk. We can reduce the size of class 0, by omitting Water body area, and highest Altitude (>2300m).

  • The surface with High risk range [0.75,1] is the lowest class, followed by Middle with probability between [0.5, 0.75].

  • The proportion of area with no risk coverts the most important surface during the Flood 2015.

  • We expect that these values will increase for surfaces with risk, if the pluviometry is higher during flood 2019.

2 Deal with unbalanced dataset

 # balanced_train <- new_train %>%
 #  filter(Year == 2015)  %>%
 #  #filter(Target== 0) %>%
 #  group_by(`Target Range`, Height) %>%
 #   sample_n(1000, replace = TRUE) %>%
 #  ungroup() %>%
 #  select(Longitude, Latitude, `Target Range`, `Altitude (m)`, Soil_type, Pluviometry, cum_Pluvio, Target, Height, SoilHeight_Weight)
 #   #filter(Target > 0) %>%
 #  # group_by(Height) %>%
 #  #summarise(n())
 # 
 # 
 #  balanced_train %>%
 #  mutate(wrap_mode = as.factor(if_else(Target == 0, "No Risk 2015", "With Risk 2015"))) %>%
 #  group_by(`Target Range`, wrap_mode) %>%
 #  summarise(Frequency = n()) %>%
 #  ungroup() %>%
 #  ggplot +
 #  aes(x = `Target Range`, y = Frequency, colour = `Target Range`) +
 #  geom_point()+
 #  facet_wrap(wrap_mode~ ., ncol = 2 , scales = "free") +
 # scale_colour_manual(values = terrain.colors(6),labels = c( 0, 0.25, 0.5, 0.75, 1))+
 #  ggtitle("Flood Risk Distribution (Balanced)") 

2.1 Flood Zone and Pluviomtery: 2015 versus 2019

Square_ID_F <- new_train %>%
 filter(`Week of` <= "2015-03-15") %>%
  #filter(Target  > 0) %>%
  select(Square_ID) %>%
  unique() 

new_train %>%
 filter(Square_ID %in% Square_ID_F$Square_ID) %>%
  group_by(Year)%>%
  filter(`Week of` == max(`Week of`))%>%
  ggplot() +
  aes(x = Longitude, y = Latitude) +
  geom_point(aes(colour = cum_Pluvio, size = Target)) +  #  , size = target_2015
   scale_colour_gradientn(colours = c("red","darkblue","blue","lightblue","white"),
                         values = c(1.0,0.8,0.6,0.4,0.2,0)) +
  #geom_point(data = subset(new_train, is.na(Target)), aes(size = 0, colour = cum_Pluvio)) +
  scale_size(name =  "Target",
             breaks = c(0, 0.5, 1, 2)) +
   facet_wrap(Year ~., ncol = 2, scales = "free") +
    geom_rect(aes(xmin = 34.95, xmax = 35.1, ymin = -15.71, ymax =-15.87),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.05, xmax = 35.15, ymin = -16.52, ymax =-16.64),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 34.8, xmax = 34.88, ymin = -15.98, ymax =-16.29),
               fill = "transparent", color = "black", size = 0.5) +
   geom_rect(aes(xmin = 35.39, xmax = 35.41, ymin = -15.77, ymax =-15.85),
               fill = "transparent", color = "black", size = 0.5) +
     geom_rect(aes(xmin = 35.38, xmax = 35.4, ymin = -15.24, ymax =-15.3),
               fill = "transparent", color = "black", size = 0.5) +
  ggtitle("Flood Zone during 2015 and Cumulative Pluviometry Maps ")

  • Depending only on cumulative pluviometry we expect to have more flood surface during 2019 compared to flood during 2015.

  • Cumulative pluviomtery of the same zone during 2019 is higher (2 times) that those of 2015.

  • The Flood seems to be more invasive and the water will reach higher altitude and more area.

3 Random Forest modeling of Flood Zone Probability

# library(randomForest)
# train_2015 <- balanced_train %>%
#   #filter(Year == 2015) %>%
#   rename(Altitude = `Altitude (m)`) %>%
#   mutate(Altitude = log(Altitude)) %>%
#   mutate(Height = as.numeric(as.factor(Height))) %>%
#   select(Longitude, Latitude, Altitude, Target, Soil_type, cum_Pluvio, Pluviometry, Height, SoilHeight_Weight)
# 
# 
# rf_model <- randomForest(Target ~  Longitude +Latitude + Soil_type + cum_Pluvio + Altitude + Pluviometry + Height + SoilHeight_Weight + 
#                            Target_Range ,
#                          data = train_2015,
#                          ntree=500)
# 
# load("rf_model.RData")
# save(rf_model, file ="rf_model.RData")
# 
# summary(rf_model)
# train_2019 <- new_train %>%
#    filter(Year == 2019) %>%
#   rename(Altitude = `Altitude (m)`) %>%
#     mutate(Altitude = log(Altitude)) %>%
#     mutate(Height = as.numeric(as.factor(Height))) %>%
#   select(Longitude, Latitude,Altitude, -Target, Soil_type, cum_Pluvio, Pluviometry, Height, SoilHeight_Weight)
#  # select(-Target, Altitude,  Soil_type, cum_Pluvio, Pluviometry)
# 
# target_2019 <- round(predict(rf_model, train_2019, type="response"), digits = 2)
# 
# 
# 
# summary(target_2019)
# 
# 
# new_train[new_train$Year == 2019,]$Target <- target_2019

3.1 Visualize RF Prediction

# new_train %>%
#  ggplot() +
#   aes(x = Longitude, y = Latitude) +  #, colour = cum_Pluvio
#   geom_point(aes(colour = Target, size = Target)) + 
#   #scale_colour_gradient(low = "blue", high = "red") +
#   scale_colour_gradientn(colours = c("red","darkblue","blue","lightblue","white"),
#                          values = c(1.0,0.75,0.5,0.25,0)) +
#    scale_size(name =  "Target",
#              breaks = c(0, 0.25, 0.5, 0.75, 1)) +
#   facet_wrap(Year ~., ncol = 2) +
#     geom_rect(aes(xmin = 34.95, xmax = 35.1, ymin = -15.71, ymax =-15.87),
#                fill = "transparent", color = "black", size = 0.5) +
#    geom_rect(aes(xmin = 35.05, xmax = 35.15, ymin = -16.52, ymax =-16.64),
#                fill = "transparent", color = "black", size = 0.5) +
#      geom_rect(aes(xmin = 34.8, xmax = 34.88, ymin = -15.98, ymax =-16.29),
#                fill = "transparent", color = "black", size = 0.5) +
#    geom_rect(aes(xmin = 35.39, xmax = 35.41, ymin = -15.77, ymax =-15.85),
#                fill = "transparent", color = "black", size = 0.5) +
#      geom_rect(aes(xmin = 35.38, xmax = 35.4, ymin = -15.24, ymax =-15.3),
#                fill = "transparent", color = "black", size = 0.5)+
#   ggtitle("Flood zone prediction (RF) during 2019: Prob = ft(Cumulative Pluviometry, Altitude, Pluviometry, Soil_type)")
  • Random Forest prediction does not give a good predction. We expect to have larger flood zone than flood 2015.

  • The disaster seems to be more invasive and mostly all the south will be flooded except zone with Altitude more than 2000 m

4 Caret

Very time consuming!

require(caret)

# set.seed(42)
# index <- createDataPartition(train_2015$Target, p = 0.7, list = FALSE)
# train_data <- train_2015[index, ]
# test_data  <- train_2015[-index, ]
# 
# set.seed(42)
# model_rf <- caret::train(Target ~ .,
#                          data = train_data,
#                          method = "rf",
#                          metric = "RMSE",
#                          preProcess = c("scale", "center"),
#                          trControl = trainControl(method = "repeatedcv", 
#                                                   number = 2, 
#                                                   repeats = 1, 
#                                                   verboseIter = TRUE)
#                          )
# 
# 
# save(model_rf, file ="model_caret_rf.RData")
# 
# final <- data.frame(actual = as.factor(as.character(test_data$Target)),
#                     pred = predict(model_rf, newdata = test_data, type = "raw"))
# 
# #final$predict <- as.factor(ifelse(final$pred >= 0.75, 1, 0))
# 
# #cm_original <- confusionMatrix(final$predict, final$actual)
# 
# target_2019 <- round(predict(model_rf, train_2019, type="raw"), digits = 2)
# 
# 
# 
# summary(target_2019)
# 
# 
# #new_train[new_train$Year == 2019,]$Target <- target_2019

5 Round Target to (0,1)

new_train01 <- new_train %>%
    mutate(Target = as.factor(if_else(Target >= 0.5, 1, 0)))%>%
    group_by(Square_ID) %>%
   mutate(meanPluv = mean(Pluviometry),
   medianPluv = median(Pluviometry),
   maxPluv = max(Pluviometry))%>%
   ungroup()

new_train01 %>%
  filter(Year == 2015)%>%
  group_by(Target) %>%
  summarise(nbr = n()) %>%
  ggplot() +
  aes(x = Target, y = nbr, fill= Target) +
  geom_col()

new_train01 %>%
#mutate(Target = as.factor(Target)) %>%
filter(Year == 2015) %>%
  ggplot +
  geom_point(aes(x = Longitude, y = Latitude, color = Height)) +
  geom_point(#data = subset(new_train, Target == 1), 
             aes(x = Longitude, y = Latitude, color = Target, shape = Target),
             size = 0.6, stroke = 0, shape = 19) +
  scale_colour_manual(values = c( "lightblue" ,  "red" , "green" , "lightgreen" ,  "pink" ,"gold" ,"orange" , "darkorange1" ,"chocolate", "darkseagreen1","darkseagreen1","darkorchid" ,  "azure2","burlywood1"),
                  labels = c( "0", "1", "L1", "L2", "L3", "L4","L5", "L6", "L7" , "River1", "River2","River3", "Water" , "Wetlands" ),                   name = "Height/Target") 

5.1 Add target Range variable to 2019

Target_Range_vs_XY_2015 <- new_train01 %>%
                                    filter(Year == 2015) %>%
                                    distinct(Square_ID, .keep_all = TRUE) %>%
                                    select(Square_ID,`Target Range`)

Train_2019 <- new_train01 %>%
                          filter(Year == 2019) %>%
                          select(-`Target Range`)


Train_2019_TR <- Train_2019 %>%
                left_join(Target_Range_vs_XY_2015, by = "Square_ID")

new_train01[new_train01$Year == 2019,]$`Target Range` <- Train_2019_TR$`Target Range`

new_train01[new_train01$Year == 2019,] %>% head
## # A tibble: 6 x 22
##   Longitude Latitude `Altitude (m)` Soil_type Square_ID other `Week of` 
##       <dbl>    <dbl>          <dbl>     <int> <chr>     <chr> <date>    
## 1      34.3    -15.9           888.         9 4e3c3896  14ce… 2019-01-20
## 2      34.3    -15.9           743.         9 4e3c3897  14ce… 2019-01-20
## 3      34.3    -15.9           566.         9 4e3c3898  14ce… 2019-01-20
## 4      34.3    -15.9           443.        10 4e3c3899  14ce… 2019-01-20
## 5      34.3    -15.9           437.        10 4e3c389a  14ce… 2019-01-20
## 6      34.3    -15.9           406.        10 4e3c389b  14ce… 2019-01-20
## # … with 15 more variables: Pluviometry <dbl>, Week <dbl>, Year <dbl>,
## #   cum_Pluvio <dbl>, Target <fct>, Color <chr>, `Soil Name` <fct>,
## #   Description <chr>, `Target Range` <fct>, Height <chr>, wrap_mode <fct>,
## #   SoilHeight_Weight <dbl>, meanPluv <dbl>, medianPluv <dbl>, maxPluv <dbl>

5.2 Save Train2015 and test2019 dataset for Python

dataset2019 <- new_train01 %>%
  select(Longitude, Latitude,`Week of` , Week, `Altitude (m)`, 
         Soil_type, Square_ID, Pluviometry, cum_Pluvio, Target,
         `Target Range`, Height, SoilHeight_Weight, meanPluv, medianPluv, maxPluv) %>%
  rename(weeks = `Week of`) %>%
  filter(weeks > "2015-03-30") %>%
  rename(Altitude = `Altitude (m)`) %>%
  mutate(Altitude = log(Altitude)) %>%
  rename(Target_Range = `Target Range`) %>%
  mutate(Target_Range = as.numeric(Target_Range) -1) %>%
  mutate(Height = as.numeric(as.factor(Height)) - 1)


fwrite(dataset2019, "dataset2019.csv")

head(dataset2019)
## # A tibble: 6 x 16
##   Longitude Latitude weeks       Week Altitude Soil_type Square_ID Pluviometry
##       <dbl>    <dbl> <date>     <dbl>    <dbl>     <int> <chr>           <dbl>
## 1      34.3    -15.9 2019-01-20     3     6.79         9 4e3c3896         13.0
## 2      34.3    -15.9 2019-01-20     3     6.61         9 4e3c3897         13.0
## 3      34.3    -15.9 2019-01-20     3     6.34         9 4e3c3898         13.0
## 4      34.3    -15.9 2019-01-20     3     6.09        10 4e3c3899         13.0
## 5      34.3    -15.9 2019-01-20     3     6.08        10 4e3c389a         13.0
## 6      34.3    -15.9 2019-01-20     3     6.01        10 4e3c389b         13.0
## # … with 8 more variables: cum_Pluvio <dbl>, Target <fct>, Target_Range <dbl>,
## #   Height <dbl>, SoilHeight_Weight <dbl>, meanPluv <dbl>, medianPluv <dbl>,
## #   maxPluv <dbl>

6 Animated Pluviometry of 2015 and 2019

new_train %>%
  filter(`Week of` <= "2015-04-15") %>%
  ggplot() +
  aes(x = Longitude, y = Latitude, colour = cum_Pluvio) +
  geom_point(size = 4) +
  #geom_point( aes(x = Longitude, y = Latitude, colour = Target))+ # data = subset(new_train[new_train$Target > 0.75,]),
  #scale_colour_gradient(low = "blue", high = "red") +
  #scale_colour_gradientn(colours = rainbow(3))+
  scale_colour_gradientn(colours = c("red","darkblue","blue","lightblue","white"),
                         values = c(1.0,0.8,0.6,0.4,0.2,0)) +
#scale_colour_gradientn(colours = c("blue","lightblue","white"),
 #                        values = c(1.0,0.5,0)) +

  facet_wrap(`Week of` ~., ncol = 4 ) +
  ggtitle("Cumulative Pluviometry of 2015 before Flood") 

new_train %>%
  filter(`Week of` > "2015-03-15") %>%
  ggplot() +
  aes(x = Longitude, y = Latitude, colour = cum_Pluvio) +
  geom_point(size = 4) +
  #scale_colour_gradient(low = "blue", high = "red") +
  scale_colour_gradientn(colours = c("red","darkblue","blue","lightblue","white"),
                         values = c(1.0,0.8,0.6,0.4,0.2,0)) +
  facet_wrap(`Week of` ~., ncol = 4 ) +
  ggtitle("Cumulative Pluviometry of 2019 before Flood") 

6.1 Google Earth Engine Precipitation view

We used Google Earth Engine to see pluvimmetry before Flood 2015 and 2019 around the south of Malawi.

6.2 Get animated Cumulative Pluviometery

# require(gganimate)
# 
# goo <- new_train %>%
#   filter(`Week of` <= "2015-03-15") %>%
#   ggplot() +
#   aes(x = Longitude, y = Latitude, colour = cum_Pluvio) +
#   geom_point(size = 4) +
#   #scale_colour_gradient(low = "blue", high = "red") +
#   scale_colour_gradientn(colours = c("blue","lightblue","white"),
#                          values = c(1.0,0.5,0)) +
#   #facet_wrap(`Week of` ~., ncol = 3 ) +
#   transition_time(`Week of`) +
#   labs(title = "Day: {frame_time}")
# 
# # foo <- new_train %>%
# #   filter(`Week of` > "2015-03-15") %>%
# #   ggplot() +
# #   aes(x = Longitude, y = Latitude, colour = Pluviometry) +
# #   geom_point(size = 4) +
# #   #facet_wrap(`Week of` ~., ncol = 3 ) +
# #   transition_time(`Week of`) +
# #   labs(title = "Day: {frame_time}")
# 
# 
# ## get and save animation
# nframes <- length(unique(goo$data$`Week of`)) * 7
# ani_goo <- animate(goo, nframes = nframes, fps = 10)
# png_files <- list.files(".", pattern = ".*png$", full.names = TRUE)
# require(gifski)
# #gifski(png_files, gif_file = "ani_goo.gif", width = 800, height = 600, delay = 1)
# #invisible(file.remove(png_files, ))
# 
# #animate(goo, renderer = ffmpeg_renderer(format = "webm"))
# #animate(foo, renderer = ffmpeg_renderer(format = "webm"))

link

# require(plotly)
# 
# new_train %>%
#   filter(`Week of` < "2014-12-14") %>%
#   plot_ly(
#     x = ~Longitude, 
#     y = ~Latitude, 
#     #size = ~pop, 
#     color = ~Pluviometry, 
#     frame = ~ as.Date(`Week of`), 
#     text = ~`Week of`, 
#     hoverinfo = "text",
#     type = 'scatter',
#     mode = 'markers'
#   ) %>%
#   layout(
#     xaxis = list(
#       type = "log"
#     )
#   )

7 Xgboost regression with Python

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from collections import Counter
train = pd.read_csv("dataset2015.csv")
test = pd.read_csv("dataset2019.csv")
train.head()
##    Longitude  Latitude       weeks  ...   meanPluv  medianPluv    maxPluv
## 0      34.26    -15.91  2014-11-16  ...  12.788037    4.689434  58.362456
## 1      34.26    -15.90  2014-11-16  ...  12.788037    4.689434  58.362456
## 2      34.26    -15.89  2014-11-16  ...  12.788037    4.689434  58.362456
## 3      34.26    -15.88  2014-11-16  ...  12.788037    4.689434  58.362456
## 4      34.26    -15.87  2014-11-16  ...  12.788037    4.689434  58.362456
## 
## [5 rows x 16 columns]
Counter(train['Target_Range'])
## Counter({0: 232917, 1: 19363, 4: 14178, 2: 8024, 3: 5440})

dic_range={0: "0", 1: "Low", 2 : "medium", 3:"High", 4:"Sure"}

plt.show(
plt.bar(range(len(dic_range)),list(Counter(train['Target_Range']).values()))
)


# split Train and Test


# Separate minors and major Target_Range
train0 = train[train.Target_Range == 0]
train1 = train[train.Target_Range == 1]
train2 = train[train.Target_Range == 2]
train3 = train[train.Target_Range == 3]
train4 = train[train.Target_Range == 4]

from sklearn.utils import resample

# Upsample minority classes
train_0_downsampled = resample(train0, 
                                 replace= False,     # sample with replacement
                                 n_samples=19363,    # to match majority class
                                 random_state=123) # reproducible results
# 
# Upsample minority classes
#train_1_downsampled = resample(train1, 
#                                  replace=False,     # sample with replacement
#                                 n_samples=8024,    # to match majority class
#                                  random_state=123) # reproducible results                              


# Downsample majority class
train_2_upsampled = resample(train2, 
                                 replace=True,    # sample without replacement
                                 n_samples=19363,     # to match minority class
                                 random_state=123) # reproducible results
                                 
# Downsample majority class
train_3_upsampled = resample(train3, 
                                 replace= True,    # sample without replacement
                                 n_samples=19363,     # to match minority class
                                 random_state=123) #
                                 
# Downsample majority class
train_4_upsampled = resample(train4, 
                                 replace= True,    # sample without replacement
                                n_samples=19363,     # to match minority class
                                 random_state=123) #
                          
# Combine minority class with downsampled majority class
train_balanced = pd.concat([train_0_downsampled, train1, train_2_upsampled, train_3_upsampled, train_4_upsampled])


Counter(train_balanced['Target_Range'])
## Counter({0: 19363, 1: 19363, 2: 19363, 3: 19363, 4: 19363})
train_balanced.columns
## Index(['Longitude', 'Latitude', 'weeks', 'Week', 'Altitude', 'Soil_type',
##        'Square_ID', 'Pluviometry', 'cum_Pluvio', 'Target', 'Target_Range',
##        'Height', 'SoilHeight_Weight', 'meanPluv', 'medianPluv', 'maxPluv'],
##       dtype='object')

ytrain = train.Target

xtrain = train.drop(['Target', 'Square_ID', 'weeks', 'Latitude', 'Longitude', 'meanPluv', 'medianPluv', 'maxPluv', 'Pluviometry'], axis=1)  # 

xtest = test.drop(['Target', 'weeks',  'Latitude', 'Longitude', 'meanPluv', 'medianPluv', 'maxPluv', 'Pluviometry'], axis=1) # 

## Dummy  variable levels is not better
# def prepare_data_for_model(raw_dataframe, target_columns, drop_first = True, make_na_col = False):
#     # dummy all categorical fields 
#     dataframe_dummy = pd.get_dummies(raw_dataframe, columns=target_columns, 
#                                      drop_first=drop_first, 
#                                      dummy_na=make_na_col)
#     return (dataframe_dummy)
#     
# 
# # create dummy features 
# xtrain_dum = prepare_data_for_model(xtrain, target_columns=['Soil_type', 'Height']) #, 'Nstage' : 3 classes
# xtrain_dum = xtrain_dum.dropna() 
# 
# # create dummy features for test
# xtest_dum = prepare_data_for_model(xtest, target_columns=['Soil_type', 'Height']) #, 'Nstage' : 4 classes
# xtest_dum = xtest_dum.dropna() 
# 
# xtrain_dum.head
from sklearn.model_selection import train_test_split
import xgboost as xgb
import time



## Split 
trn_x, val_x, trn_y, val_y = train_test_split(xtrain , ytrain, random_state = 42, stratify = ytrain, test_size = 0.20) # 
start_time = time.process_time()

clf = xgb.XGBRegressor(booster = 'gbtree',
                      objective = 'reg:logistic', # multi:softmax, multi:softprob  #'reg:squarederror',  # reg:linear reg:logistic binary:logistic
                      #num_class =  2,
                      max_depth = 10,
                      n_estimators = 10000,
                      min_child_weight = 9,
                      learning_rate = 0.01,
                      nthread = 8,
                      subsample = 0.80,
                      colsample_bytree = 0.80,
                      seed = 4242)




clf.fit(trn_x,
        trn_y,
        eval_set = [(val_x, val_y)],
        verbose = True,
        #verbose_eval= 10,  # print every 10 boost
        eval_metric = 'rmse', # rmse, logloss, mae, map, cox-nloglik
        early_stopping_rounds = 10)
## [0]  validation_0-rmse:0.495411
## Will train until validation_0-rmse hasn't improved in 10 rounds.
## [1]  validation_0-rmse:0.490456
## [2]  validation_0-rmse:0.485552
## [3]  validation_0-rmse:0.480697
## [4]  validation_0-rmse:0.475898
## [5]  validation_0-rmse:0.471438
## [6]  validation_0-rmse:0.466731
## [7]  validation_0-rmse:0.462064
## [8]  validation_0-rmse:0.457445
## [9]  validation_0-rmse:0.452875
## [10] validation_0-rmse:0.448646
## [11] validation_0-rmse:0.444613
## [12] validation_0-rmse:0.440168
## [13] validation_0-rmse:0.435771
## [14] validation_0-rmse:0.431419
## [15] validation_0-rmse:0.427409
## [16] validation_0-rmse:0.423144
## [17] validation_0-rmse:0.418916
## [18] validation_0-rmse:0.414731
## [19] validation_0-rmse:0.410867
## [20] validation_0-rmse:0.407207
## [21] validation_0-rmse:0.403134
## [22] validation_0-rmse:0.399486
## [23] validation_0-rmse:0.395487
## [24] validation_0-rmse:0.391545
## [25] validation_0-rmse:0.387644
## [26] validation_0-rmse:0.383777
## [27] validation_0-rmse:0.379935
## [28] validation_0-rmse:0.376431
## [29] validation_0-rmse:0.372664
## [30] validation_0-rmse:0.368958
## [31] validation_0-rmse:0.36527
## [32] validation_0-rmse:0.361621
## [33] validation_0-rmse:0.358279
## [34] validation_0-rmse:0.354709
## [35] validation_0-rmse:0.351181
## [36] validation_0-rmse:0.347669
## [37] validation_0-rmse:0.344599
## [38] validation_0-rmse:0.341162
## [39] validation_0-rmse:0.338015
## [40] validation_0-rmse:0.334644
## [41] validation_0-rmse:0.331641
## [42] validation_0-rmse:0.328325
## [43] validation_0-rmse:0.325307
## [44] validation_0-rmse:0.322071
## [45] validation_0-rmse:0.319188
## [46] validation_0-rmse:0.316008
## [47] validation_0-rmse:0.313231
## [48] validation_0-rmse:0.310111
## [49] validation_0-rmse:0.307018
## [50] validation_0-rmse:0.303959
## [51] validation_0-rmse:0.300929
## [52] validation_0-rmse:0.298188
## [53] validation_0-rmse:0.295218
## [54] validation_0-rmse:0.292277
## [55] validation_0-rmse:0.289362
## [56] validation_0-rmse:0.28648
## [57] validation_0-rmse:0.28387
## [58] validation_0-rmse:0.281039
## [59] validation_0-rmse:0.278241
## [60] validation_0-rmse:0.275478
## [61] validation_0-rmse:0.272731
## [62] validation_0-rmse:0.270015
## [63] validation_0-rmse:0.267325
## [64] validation_0-rmse:0.264663
## [65] validation_0-rmse:0.26203
## [66] validation_0-rmse:0.259647
## [67] validation_0-rmse:0.257297
## [68] validation_0-rmse:0.254734
## [69] validation_0-rmse:0.252203
## [70] validation_0-rmse:0.249691
## [71] validation_0-rmse:0.24748
## [72] validation_0-rmse:0.245012
## [73] validation_0-rmse:0.242575
## [74] validation_0-rmse:0.240159
## [75] validation_0-rmse:0.23777
## [76] validation_0-rmse:0.235399
## [77] validation_0-rmse:0.233056
## [78] validation_0-rmse:0.230737
## [79] validation_0-rmse:0.228712
## [80] validation_0-rmse:0.226696
## [81] validation_0-rmse:0.224442
## [82] validation_0-rmse:0.222511
## [83] validation_0-rmse:0.220295
## [84] validation_0-rmse:0.218406
## [85] validation_0-rmse:0.216453
## [86] validation_0-rmse:0.214603
## [87] validation_0-rmse:0.212471
## [88] validation_0-rmse:0.210353
## [89] validation_0-rmse:0.208262
## [90] validation_0-rmse:0.206188
## [91] validation_0-rmse:0.20414
## [92] validation_0-rmse:0.202103
## [93] validation_0-rmse:0.200094
## [94] validation_0-rmse:0.198103
## [95] validation_0-rmse:0.19634
## [96] validation_0-rmse:0.194642
## [97] validation_0-rmse:0.192709
## [98] validation_0-rmse:0.190789
## [99] validation_0-rmse:0.189109
## [100]    validation_0-rmse:0.187226
## [101]    validation_0-rmse:0.185365
## [102]    validation_0-rmse:0.183518
## [103]    validation_0-rmse:0.181696
## [104]    validation_0-rmse:0.179884
## [105]    validation_0-rmse:0.178099
## [106]    validation_0-rmse:0.176528
## [107]    validation_0-rmse:0.174773
## [108]    validation_0-rmse:0.173033
## [109]    validation_0-rmse:0.171314
## [110]    validation_0-rmse:0.169611
## [111]    validation_0-rmse:0.167922
## [112]    validation_0-rmse:0.166251
## [113]    validation_0-rmse:0.164599
## [114]    validation_0-rmse:0.162962
## [115]    validation_0-rmse:0.161339
## [116]    validation_0-rmse:0.159735
## [117]    validation_0-rmse:0.158369
## [118]    validation_0-rmse:0.156982
## [119]    validation_0-rmse:0.155419
## [120]    validation_0-rmse:0.153876
## [121]    validation_0-rmse:0.152346
## [122]    validation_0-rmse:0.150833
## [123]    validation_0-rmse:0.14933
## [124]    validation_0-rmse:0.147845
## [125]    validation_0-rmse:0.146378
## [126]    validation_0-rmse:0.144921
## [127]    validation_0-rmse:0.143478
## [128]    validation_0-rmse:0.142261
## [129]    validation_0-rmse:0.140846
## [130]    validation_0-rmse:0.139446
## [131]    validation_0-rmse:0.13806
## [132]    validation_0-rmse:0.136687
## [133]    validation_0-rmse:0.135504
## [134]    validation_0-rmse:0.134156
## [135]    validation_0-rmse:0.132821
## [136]    validation_0-rmse:0.131705
## [137]    validation_0-rmse:0.130395
## [138]    validation_0-rmse:0.129271
## [139]    validation_0-rmse:0.128158
## [140]    validation_0-rmse:0.127062
## [141]    validation_0-rmse:0.125798
## [142]    validation_0-rmse:0.124549
## [143]    validation_0-rmse:0.123484
## [144]    validation_0-rmse:0.122256
## [145]    validation_0-rmse:0.121215
## [146]    validation_0-rmse:0.120011
## [147]    validation_0-rmse:0.11882
## [148]    validation_0-rmse:0.117636
## [149]    validation_0-rmse:0.116468
## [150]    validation_0-rmse:0.115309
## [151]    validation_0-rmse:0.114167
## [152]    validation_0-rmse:0.113033
## [153]    validation_0-rmse:0.111911
## [154]    validation_0-rmse:0.11096
## [155]    validation_0-rmse:0.10986
## [156]    validation_0-rmse:0.108766
## [157]    validation_0-rmse:0.107686
## [158]    validation_0-rmse:0.106779
## [159]    validation_0-rmse:0.105915
## [160]    validation_0-rmse:0.104862
## [161]    validation_0-rmse:0.103823
## [162]    validation_0-rmse:0.102791
## [163]    validation_0-rmse:0.101771
## [164]    validation_0-rmse:0.10076
## [165]    validation_0-rmse:0.099917
## [166]    validation_0-rmse:0.099109
## [167]    validation_0-rmse:0.098284
## [168]    validation_0-rmse:0.097308
## [169]    validation_0-rmse:0.096342
## [170]    validation_0-rmse:0.095387
## [171]    validation_0-rmse:0.094439
## [172]    validation_0-rmse:0.0935
## [173]    validation_0-rmse:0.092574
## [174]    validation_0-rmse:0.091657
## [175]    validation_0-rmse:0.090747
## [176]    validation_0-rmse:0.089847
## [177]    validation_0-rmse:0.088954
## [178]    validation_0-rmse:0.088071
## [179]    validation_0-rmse:0.087198
## [180]    validation_0-rmse:0.086332
## [181]    validation_0-rmse:0.085477
## [182]    validation_0-rmse:0.084628
## [183]    validation_0-rmse:0.083789
## [184]    validation_0-rmse:0.082956
## [185]    validation_0-rmse:0.082298
## [186]    validation_0-rmse:0.081482
## [187]    validation_0-rmse:0.080673
## [188]    validation_0-rmse:0.079871
## [189]    validation_0-rmse:0.079081
## [190]    validation_0-rmse:0.078297
## [191]    validation_0-rmse:0.07768
## [192]    validation_0-rmse:0.076908
## [193]    validation_0-rmse:0.076146
## [194]    validation_0-rmse:0.07539
## [195]    validation_0-rmse:0.074641
## [196]    validation_0-rmse:0.074031
## [197]    validation_0-rmse:0.07343
## [198]    validation_0-rmse:0.072702
## [199]    validation_0-rmse:0.071981
## [200]    validation_0-rmse:0.071399
## [201]    validation_0-rmse:0.070844
## [202]    validation_0-rmse:0.070142
## [203]    validation_0-rmse:0.069446
## [204]    validation_0-rmse:0.068758
## [205]    validation_0-rmse:0.068076
## [206]    validation_0-rmse:0.067401
## [207]    validation_0-rmse:0.066732
## [208]    validation_0-rmse:0.066069
## [209]    validation_0-rmse:0.065415
## [210]    validation_0-rmse:0.064765
## [211]    validation_0-rmse:0.064122
## [212]    validation_0-rmse:0.063627
## [213]    validation_0-rmse:0.062995
## [214]    validation_0-rmse:0.062371
## [215]    validation_0-rmse:0.061753
## [216]    validation_0-rmse:0.061139
## [217]    validation_0-rmse:0.060534
## [218]    validation_0-rmse:0.059934
## [219]    validation_0-rmse:0.059474
## [220]    validation_0-rmse:0.058885
## [221]    validation_0-rmse:0.058301
## [222]    validation_0-rmse:0.057722
## [223]    validation_0-rmse:0.057283
## [224]    validation_0-rmse:0.056833
## [225]    validation_0-rmse:0.056269
## [226]    validation_0-rmse:0.05571
## [227]    validation_0-rmse:0.055159
## [228]    validation_0-rmse:0.054612
## [229]    validation_0-rmse:0.05407
## [230]    validation_0-rmse:0.053534
## [231]    validation_0-rmse:0.053003
## [232]    validation_0-rmse:0.052477
## [233]    validation_0-rmse:0.051956
## [234]    validation_0-rmse:0.051442
## [235]    validation_0-rmse:0.050931
## [236]    validation_0-rmse:0.050426
## [237]    validation_0-rmse:0.050051
## [238]    validation_0-rmse:0.049554
## [239]    validation_0-rmse:0.049063
## [240]    validation_0-rmse:0.048681
## [241]    validation_0-rmse:0.048324
## [242]    validation_0-rmse:0.047844
## [243]    validation_0-rmse:0.04737
## [244]    validation_0-rmse:0.0469
## [245]    validation_0-rmse:0.046435
## [246]    validation_0-rmse:0.045975
## [247]    validation_0-rmse:0.045519
## [248]    validation_0-rmse:0.045067
## [249]    validation_0-rmse:0.044721
## [250]    validation_0-rmse:0.044278
## [251]    validation_0-rmse:0.043838
## [252]    validation_0-rmse:0.043405
## [253]    validation_0-rmse:0.042974
## [254]    validation_0-rmse:0.042548
## [255]    validation_0-rmse:0.042126
## [256]    validation_0-rmse:0.041709
## [257]    validation_0-rmse:0.041295
## [258]    validation_0-rmse:0.040885
## [259]    validation_0-rmse:0.04048
## [260]    validation_0-rmse:0.040078
## [261]    validation_0-rmse:0.03968
## [262]    validation_0-rmse:0.039287
## [263]    validation_0-rmse:0.038988
## [264]    validation_0-rmse:0.038601
## [265]    validation_0-rmse:0.038219
## [266]    validation_0-rmse:0.03784
## [267]    validation_0-rmse:0.037465
## [268]    validation_0-rmse:0.037093
## [269]    validation_0-rmse:0.036813
## [270]    validation_0-rmse:0.03655
## [271]    validation_0-rmse:0.036189
## [272]    validation_0-rmse:0.03583
## [273]    validation_0-rmse:0.035561
## [274]    validation_0-rmse:0.035306
## [275]    validation_0-rmse:0.034957
## [276]    validation_0-rmse:0.03461
## [277]    validation_0-rmse:0.034267
## [278]    validation_0-rmse:0.033927
## [279]    validation_0-rmse:0.033677
## [280]    validation_0-rmse:0.033445
## [281]    validation_0-rmse:0.033113
## [282]    validation_0-rmse:0.032785
## [283]    validation_0-rmse:0.03246
## [284]    validation_0-rmse:0.032138
## [285]    validation_0-rmse:0.031819
## [286]    validation_0-rmse:0.031504
## [287]    validation_0-rmse:0.031191
## [288]    validation_0-rmse:0.030883
## [289]    validation_0-rmse:0.030577
## [290]    validation_0-rmse:0.030274
## [291]    validation_0-rmse:0.03006
## [292]    validation_0-rmse:0.029843
## [293]    validation_0-rmse:0.029629
## [294]    validation_0-rmse:0.029424
## [295]    validation_0-rmse:0.029133
## [296]    validation_0-rmse:0.028844
## [297]    validation_0-rmse:0.028558
## [298]    validation_0-rmse:0.028355
## [299]    validation_0-rmse:0.028156
## [300]    validation_0-rmse:0.027877
## [301]    validation_0-rmse:0.027682
## [302]    validation_0-rmse:0.027408
## [303]    validation_0-rmse:0.027136
## [304]    validation_0-rmse:0.026867
## [305]    validation_0-rmse:0.026601
## [306]    validation_0-rmse:0.026417
## [307]    validation_0-rmse:0.026155
## [308]    validation_0-rmse:0.025981
## [309]    validation_0-rmse:0.025804
## [310]    validation_0-rmse:0.025548
## [311]    validation_0-rmse:0.025295
## [312]    validation_0-rmse:0.025044
## [313]    validation_0-rmse:0.024875
## [314]    validation_0-rmse:0.024719
## [315]    validation_0-rmse:0.024475
## [316]    validation_0-rmse:0.024323
## [317]    validation_0-rmse:0.024083
## [318]    validation_0-rmse:0.023844
## [319]    validation_0-rmse:0.023686
## [320]    validation_0-rmse:0.023452
## [321]    validation_0-rmse:0.023303
## [322]    validation_0-rmse:0.023072
## [323]    validation_0-rmse:0.022843
## [324]    validation_0-rmse:0.022618
## [325]    validation_0-rmse:0.022394
## [326]    validation_0-rmse:0.022252
## [327]    validation_0-rmse:0.022032
## [328]    validation_0-rmse:0.021814
## [329]    validation_0-rmse:0.021684
## [330]    validation_0-rmse:0.021469
## [331]    validation_0-rmse:0.021331
## [332]    validation_0-rmse:0.02112
## [333]    validation_0-rmse:0.020911
## [334]    validation_0-rmse:0.02079
## [335]    validation_0-rmse:0.020584
## [336]    validation_0-rmse:0.020454
## [337]    validation_0-rmse:0.020331
## [338]    validation_0-rmse:0.020129
## [339]    validation_0-rmse:0.020009
## [340]    validation_0-rmse:0.01989
## [341]    validation_0-rmse:0.019693
## [342]    validation_0-rmse:0.019573
## [343]    validation_0-rmse:0.01938
## [344]    validation_0-rmse:0.019188
## [345]    validation_0-rmse:0.018998
## [346]    validation_0-rmse:0.01881
## [347]    validation_0-rmse:0.018624
## [348]    validation_0-rmse:0.01844
## [349]    validation_0-rmse:0.018257
## [350]    validation_0-rmse:0.018077
## [351]    validation_0-rmse:0.017898
## [352]    validation_0-rmse:0.017721
## [353]    validation_0-rmse:0.017546
## [354]    validation_0-rmse:0.01744
## [355]    validation_0-rmse:0.017268
## [356]    validation_0-rmse:0.017097
## [357]    validation_0-rmse:0.016995
## [358]    validation_0-rmse:0.016895
## [359]    validation_0-rmse:0.016729
## [360]    validation_0-rmse:0.016631
## [361]    validation_0-rmse:0.016467
## [362]    validation_0-rmse:0.016304
## [363]    validation_0-rmse:0.016212
## [364]    validation_0-rmse:0.016052
## [365]    validation_0-rmse:0.015893
## [366]    validation_0-rmse:0.015736
## [367]    validation_0-rmse:0.015649
## [368]    validation_0-rmse:0.015494
## [369]    validation_0-rmse:0.015341
## [370]    validation_0-rmse:0.015257
## [371]    validation_0-rmse:0.015181
## [372]    validation_0-rmse:0.015031
## [373]    validation_0-rmse:0.014882
## [374]    validation_0-rmse:0.014735
## [375]    validation_0-rmse:0.01459
## [376]    validation_0-rmse:0.014445
## [377]    validation_0-rmse:0.014302
## [378]    validation_0-rmse:0.014233
## [379]    validation_0-rmse:0.014092
## [380]    validation_0-rmse:0.013953
## [381]    validation_0-rmse:0.013877
## [382]    validation_0-rmse:0.01374
## [383]    validation_0-rmse:0.013604
## [384]    validation_0-rmse:0.013469
## [385]    validation_0-rmse:0.013336
## [386]    validation_0-rmse:0.013264
## [387]    validation_0-rmse:0.013133
## [388]    validation_0-rmse:0.013003
## [389]    validation_0-rmse:0.012874
## [390]    validation_0-rmse:0.012807
## [391]    validation_0-rmse:0.01274
## [392]    validation_0-rmse:0.012673
## [393]    validation_0-rmse:0.012548
## [394]    validation_0-rmse:0.012424
## [395]    validation_0-rmse:0.012359
## [396]    validation_0-rmse:0.012237
## [397]    validation_0-rmse:0.012175
## [398]    validation_0-rmse:0.012055
## [399]    validation_0-rmse:0.011936
## [400]    validation_0-rmse:0.011818
## [401]    validation_0-rmse:0.011701
## [402]    validation_0-rmse:0.011585
## [403]    validation_0-rmse:0.011471
## [404]    validation_0-rmse:0.011357
## [405]    validation_0-rmse:0.011301
## [406]    validation_0-rmse:0.011246
## [407]    validation_0-rmse:0.011135
## [408]    validation_0-rmse:0.01108
## [409]    validation_0-rmse:0.010971
## [410]    validation_0-rmse:0.010862
## [411]    validation_0-rmse:0.010755
## [412]    validation_0-rmse:0.010703
## [413]    validation_0-rmse:0.010597
## [414]    validation_0-rmse:0.010493
## [415]    validation_0-rmse:0.010389
## [416]    validation_0-rmse:0.010286
## [417]    validation_0-rmse:0.010185
## [418]    validation_0-rmse:0.010084
## [419]    validation_0-rmse:0.009984
## [420]    validation_0-rmse:0.009886
## [421]    validation_0-rmse:0.00984
## [422]    validation_0-rmse:0.009742
## [423]    validation_0-rmse:0.009697
## [424]    validation_0-rmse:0.009601
## [425]    validation_0-rmse:0.009506
## [426]    validation_0-rmse:0.009412
## [427]    validation_0-rmse:0.009319
## [428]    validation_0-rmse:0.009276
## [429]    validation_0-rmse:0.009185
## [430]    validation_0-rmse:0.009094
## [431]    validation_0-rmse:0.009004
## [432]    validation_0-rmse:0.008915
## [433]    validation_0-rmse:0.008827
## [434]    validation_0-rmse:0.008787
## [435]    validation_0-rmse:0.0087
## [436]    validation_0-rmse:0.008614
## [437]    validation_0-rmse:0.008529
## [438]    validation_0-rmse:0.008444
## [439]    validation_0-rmse:0.008361
## [440]    validation_0-rmse:0.008278
## [441]    validation_0-rmse:0.008197
## [442]    validation_0-rmse:0.008116
## [443]    validation_0-rmse:0.008036
## [444]    validation_0-rmse:0.007956
## [445]    validation_0-rmse:0.007877
## [446]    validation_0-rmse:0.007843
## [447]    validation_0-rmse:0.007765
## [448]    validation_0-rmse:0.007731
## [449]    validation_0-rmse:0.007655
## [450]    validation_0-rmse:0.007579
## [451]    validation_0-rmse:0.007504
## [452]    validation_0-rmse:0.00743
## [453]    validation_0-rmse:0.007357
## [454]    validation_0-rmse:0.007284
## [455]    validation_0-rmse:0.007253
## [456]    validation_0-rmse:0.007222
## [457]    validation_0-rmse:0.00715
## [458]    validation_0-rmse:0.00712
## [459]    validation_0-rmse:0.007096
## [460]    validation_0-rmse:0.007026
## [461]    validation_0-rmse:0.006957
## [462]    validation_0-rmse:0.006888
## [463]    validation_0-rmse:0.00682
## [464]    validation_0-rmse:0.006792
## [465]    validation_0-rmse:0.006725
## [466]    validation_0-rmse:0.006659
## [467]    validation_0-rmse:0.006593
## [468]    validation_0-rmse:0.006567
## [469]    validation_0-rmse:0.006502
## [470]    validation_0-rmse:0.006438
## [471]    validation_0-rmse:0.006374
## [472]    validation_0-rmse:0.006311
## [473]    validation_0-rmse:0.006249
## [474]    validation_0-rmse:0.006187
## [475]    validation_0-rmse:0.006162
## [476]    validation_0-rmse:0.006102
## [477]    validation_0-rmse:0.006078
## [478]    validation_0-rmse:0.00606
## [479]    validation_0-rmse:0.006042
## [480]    validation_0-rmse:0.005983
## [481]    validation_0-rmse:0.005961
## [482]    validation_0-rmse:0.005902
## [483]    validation_0-rmse:0.005843
## [484]    validation_0-rmse:0.005786
## [485]    validation_0-rmse:0.005729
## [486]    validation_0-rmse:0.005672
## [487]    validation_0-rmse:0.005616
## [488]    validation_0-rmse:0.005561
## [489]    validation_0-rmse:0.00554
## [490]    validation_0-rmse:0.005486
## [491]    validation_0-rmse:0.005431
## [492]    validation_0-rmse:0.005417
## [493]    validation_0-rmse:0.005363
## [494]    validation_0-rmse:0.00531
## [495]    validation_0-rmse:0.005258
## [496]    validation_0-rmse:0.005239
## [497]    validation_0-rmse:0.005187
## [498]    validation_0-rmse:0.005136
## [499]    validation_0-rmse:0.005085
## [500]    validation_0-rmse:0.005068
## [501]    validation_0-rmse:0.005018
## [502]    validation_0-rmse:0.005001
## [503]    validation_0-rmse:0.004952
## [504]    validation_0-rmse:0.004903
## [505]    validation_0-rmse:0.004886
## [506]    validation_0-rmse:0.004838
## [507]    validation_0-rmse:0.00479
## [508]    validation_0-rmse:0.004743
## [509]    validation_0-rmse:0.004696
## [510]    validation_0-rmse:0.004681
## [511]    validation_0-rmse:0.004635
## [512]    validation_0-rmse:0.004589
## [513]    validation_0-rmse:0.004574
## [514]    validation_0-rmse:0.004529
## [515]    validation_0-rmse:0.004515
## [516]    validation_0-rmse:0.00447
## [517]    validation_0-rmse:0.004426
## [518]    validation_0-rmse:0.004383
## [519]    validation_0-rmse:0.00434
## [520]    validation_0-rmse:0.004297
## [521]    validation_0-rmse:0.004254
## [522]    validation_0-rmse:0.004241
## [523]    validation_0-rmse:0.0042
## [524]    validation_0-rmse:0.004158
## [525]    validation_0-rmse:0.004117
## [526]    validation_0-rmse:0.004076
## [527]    validation_0-rmse:0.004064
## [528]    validation_0-rmse:0.004024
## [529]    validation_0-rmse:0.003985
## [530]    validation_0-rmse:0.003976
## [531]    validation_0-rmse:0.003937
## [532]    validation_0-rmse:0.003898
## [533]    validation_0-rmse:0.00386
## [534]    validation_0-rmse:0.003822
## [535]    validation_0-rmse:0.003784
## [536]    validation_0-rmse:0.003747
## [537]    validation_0-rmse:0.003736
## [538]    validation_0-rmse:0.003699
## [539]    validation_0-rmse:0.003663
## [540]    validation_0-rmse:0.003627
## [541]    validation_0-rmse:0.003591
## [542]    validation_0-rmse:0.003556
## [543]    validation_0-rmse:0.003521
## [544]    validation_0-rmse:0.003486
## [545]    validation_0-rmse:0.003476
## [546]    validation_0-rmse:0.003442
## [547]    validation_0-rmse:0.003432
## [548]    validation_0-rmse:0.003423
## [549]    validation_0-rmse:0.003414
## [550]    validation_0-rmse:0.00338
## [551]    validation_0-rmse:0.003347
## [552]    validation_0-rmse:0.003338
## [553]    validation_0-rmse:0.003305
## [554]    validation_0-rmse:0.003296
## [555]    validation_0-rmse:0.003288
## [556]    validation_0-rmse:0.003255
## [557]    validation_0-rmse:0.003223
## [558]    validation_0-rmse:0.003192
## [559]    validation_0-rmse:0.003184
## [560]    validation_0-rmse:0.003176
## [561]    validation_0-rmse:0.003145
## [562]    validation_0-rmse:0.003113
## [563]    validation_0-rmse:0.003083
## [564]    validation_0-rmse:0.003052
## [565]    validation_0-rmse:0.003045
## [566]    validation_0-rmse:0.003015
## [567]    validation_0-rmse:0.002986
## [568]    validation_0-rmse:0.002979
## [569]    validation_0-rmse:0.002949
## [570]    validation_0-rmse:0.002943
## [571]    validation_0-rmse:0.002936
## [572]    validation_0-rmse:0.002907
## [573]    validation_0-rmse:0.002879
## [574]    validation_0-rmse:0.00285
## [575]    validation_0-rmse:0.002844
## [576]    validation_0-rmse:0.002838
## [577]    validation_0-rmse:0.00281
## [578]    validation_0-rmse:0.002782
## [579]    validation_0-rmse:0.002755
## [580]    validation_0-rmse:0.002728
## [581]    validation_0-rmse:0.002701
## [582]    validation_0-rmse:0.002675
## [583]    validation_0-rmse:0.002672
## [584]    validation_0-rmse:0.002646
## [585]    validation_0-rmse:0.00262
## [586]    validation_0-rmse:0.002594
## [587]    validation_0-rmse:0.002569
## [588]    validation_0-rmse:0.002543
## [589]    validation_0-rmse:0.002538
## [590]    validation_0-rmse:0.002513
## [591]    validation_0-rmse:0.002488
## [592]    validation_0-rmse:0.002483
## [593]    validation_0-rmse:0.002459
## [594]    validation_0-rmse:0.002435
## [595]    validation_0-rmse:0.002411
## [596]    validation_0-rmse:0.002387
## [597]    validation_0-rmse:0.002364
## [598]    validation_0-rmse:0.002362
## [599]    validation_0-rmse:0.002339
## [600]    validation_0-rmse:0.002316
## [601]    validation_0-rmse:0.002293
## [602]    validation_0-rmse:0.00227
## [603]    validation_0-rmse:0.002266
## [604]    validation_0-rmse:0.002244
## [605]    validation_0-rmse:0.002222
## [606]    validation_0-rmse:0.0022
## [607]    validation_0-rmse:0.002178
## [608]    validation_0-rmse:0.002174
## [609]    validation_0-rmse:0.002153
## [610]    validation_0-rmse:0.002132
## [611]    validation_0-rmse:0.002128
## [612]    validation_0-rmse:0.002107
## [613]    validation_0-rmse:0.002086
## [614]    validation_0-rmse:0.002083
## [615]    validation_0-rmse:0.002062
## [616]    validation_0-rmse:0.002042
## [617]    validation_0-rmse:0.002022
## [618]    validation_0-rmse:0.002018
## [619]    validation_0-rmse:0.001999
## [620]    validation_0-rmse:0.001995
## [621]    validation_0-rmse:0.001976
## [622]    validation_0-rmse:0.001956
## [623]    validation_0-rmse:0.001953
## [624]    validation_0-rmse:0.001934
## [625]    validation_0-rmse:0.001915
## [626]    validation_0-rmse:0.001896
## [627]    validation_0-rmse:0.001877
## [628]    validation_0-rmse:0.001859
## [629]    validation_0-rmse:0.001841
## [630]    validation_0-rmse:0.001823
## [631]    validation_0-rmse:0.001805
## [632]    validation_0-rmse:0.001787
## [633]    validation_0-rmse:0.00177
## [634]    validation_0-rmse:0.001752
## [635]    validation_0-rmse:0.001735
## [636]    validation_0-rmse:0.001718
## [637]    validation_0-rmse:0.001701
## [638]    validation_0-rmse:0.001685
## [639]    validation_0-rmse:0.001684
## [640]    validation_0-rmse:0.001668
## [641]    validation_0-rmse:0.001651
## [642]    validation_0-rmse:0.001635
## [643]    validation_0-rmse:0.001619
## [644]    validation_0-rmse:0.001603
## [645]    validation_0-rmse:0.001601
## [646]    validation_0-rmse:0.001598
## [647]    validation_0-rmse:0.001583
## [648]    validation_0-rmse:0.001567
## [649]    validation_0-rmse:0.001552
## [650]    validation_0-rmse:0.001537
## [651]    validation_0-rmse:0.001522
## [652]    validation_0-rmse:0.001507
## [653]    validation_0-rmse:0.001492
## [654]    validation_0-rmse:0.001478
## [655]    validation_0-rmse:0.001463
## [656]    validation_0-rmse:0.001449
## [657]    validation_0-rmse:0.001435
## [658]    validation_0-rmse:0.001421
## [659]    validation_0-rmse:0.001418
## [660]    validation_0-rmse:0.001405
## [661]    validation_0-rmse:0.001391
## [662]    validation_0-rmse:0.001377
## [663]    validation_0-rmse:0.001364
## [664]    validation_0-rmse:0.001351
## [665]    validation_0-rmse:0.001337
## [666]    validation_0-rmse:0.001324
## [667]    validation_0-rmse:0.001312
## [668]    validation_0-rmse:0.001299
## [669]    validation_0-rmse:0.001286
## [670]    validation_0-rmse:0.001274
## [671]    validation_0-rmse:0.001261
## [672]    validation_0-rmse:0.001249
## [673]    validation_0-rmse:0.001237
## [674]    validation_0-rmse:0.001225
## [675]    validation_0-rmse:0.001213
## [676]    validation_0-rmse:0.001211
## [677]    validation_0-rmse:0.001209
## [678]    validation_0-rmse:0.001207
## [679]    validation_0-rmse:0.001206
## [680]    validation_0-rmse:0.001194
## [681]    validation_0-rmse:0.001192
## [682]    validation_0-rmse:0.001191
## [683]    validation_0-rmse:0.001179
## [684]    validation_0-rmse:0.001177
## [685]    validation_0-rmse:0.001178
## [686]    validation_0-rmse:0.001166
## [687]    validation_0-rmse:0.001155
## [688]    validation_0-rmse:0.001153
## [689]    validation_0-rmse:0.001152
## [690]    validation_0-rmse:0.001151
## [691]    validation_0-rmse:0.001139
## [692]    validation_0-rmse:0.001128
## [693]    validation_0-rmse:0.001117
## [694]    validation_0-rmse:0.001106
## [695]    validation_0-rmse:0.001096
## [696]    validation_0-rmse:0.001085
## [697]    validation_0-rmse:0.001075
## [698]    validation_0-rmse:0.001064
## [699]    validation_0-rmse:0.001063
## [700]    validation_0-rmse:0.001053
## [701]    validation_0-rmse:0.001042
## [702]    validation_0-rmse:0.001032
## [703]    validation_0-rmse:0.001031
## [704]    validation_0-rmse:0.001032
## [705]    validation_0-rmse:0.001022
## [706]    validation_0-rmse:0.001012
## [707]    validation_0-rmse:0.001011
## [708]    validation_0-rmse:0.001001
## [709]    validation_0-rmse:0.001
## [710]    validation_0-rmse:0.00099
## [711]    validation_0-rmse:0.00098
## [712]    validation_0-rmse:0.000971
## [713]    validation_0-rmse:0.00097
## [714]    validation_0-rmse:0.000969
## [715]    validation_0-rmse:0.000968
## [716]    validation_0-rmse:0.000959
## [717]    validation_0-rmse:0.000949
## [718]    validation_0-rmse:0.000948
## [719]    validation_0-rmse:0.000949
## [720]    validation_0-rmse:0.00094
## [721]    validation_0-rmse:0.000931
## [722]    validation_0-rmse:0.000922
## [723]    validation_0-rmse:0.000913
## [724]    validation_0-rmse:0.000912
## [725]    validation_0-rmse:0.000903
## [726]    validation_0-rmse:0.000895
## [727]    validation_0-rmse:0.000886
## [728]    validation_0-rmse:0.000877
## [729]    validation_0-rmse:0.000869
## [730]    validation_0-rmse:0.00086
## [731]    validation_0-rmse:0.00086
## [732]    validation_0-rmse:0.000851
## [733]    validation_0-rmse:0.000843
## [734]    validation_0-rmse:0.000835
## [735]    validation_0-rmse:0.000827
## [736]    validation_0-rmse:0.000828
## [737]    validation_0-rmse:0.00082
## [738]    validation_0-rmse:0.000819
## [739]    validation_0-rmse:0.000811
## [740]    validation_0-rmse:0.000803
## [741]    validation_0-rmse:0.000796
## [742]    validation_0-rmse:0.000788
## [743]    validation_0-rmse:0.00078
## [744]    validation_0-rmse:0.000773
## [745]    validation_0-rmse:0.000774
## [746]    validation_0-rmse:0.000773
## [747]    validation_0-rmse:0.000766
## [748]    validation_0-rmse:0.000758
## [749]    validation_0-rmse:0.000751
## [750]    validation_0-rmse:0.000744
## [751]    validation_0-rmse:0.000737
## [752]    validation_0-rmse:0.00073
## [753]    validation_0-rmse:0.000723
## [754]    validation_0-rmse:0.000723
## [755]    validation_0-rmse:0.000717
## [756]    validation_0-rmse:0.000716
## [757]    validation_0-rmse:0.000709
## [758]    validation_0-rmse:0.000702
## [759]    validation_0-rmse:0.000696
## [760]    validation_0-rmse:0.000689
## [761]    validation_0-rmse:0.000682
## [762]    validation_0-rmse:0.000683
## [763]    validation_0-rmse:0.000683
## [764]    validation_0-rmse:0.000683
## [765]    validation_0-rmse:0.000677
## [766]    validation_0-rmse:0.00067
## [767]    validation_0-rmse:0.000664
## [768]    validation_0-rmse:0.000658
## [769]    validation_0-rmse:0.000659
## [770]    validation_0-rmse:0.000652
## [771]    validation_0-rmse:0.000646
## [772]    validation_0-rmse:0.00064
## [773]    validation_0-rmse:0.000634
## [774]    validation_0-rmse:0.000633
## [775]    validation_0-rmse:0.000627
## [776]    validation_0-rmse:0.000627
## [777]    validation_0-rmse:0.000621
## [778]    validation_0-rmse:0.00062
## [779]    validation_0-rmse:0.000614
## [780]    validation_0-rmse:0.000609
## [781]    validation_0-rmse:0.000603
## [782]    validation_0-rmse:0.000604
## [783]    validation_0-rmse:0.000603
## [784]    validation_0-rmse:0.000603
## [785]    validation_0-rmse:0.000597
## [786]    validation_0-rmse:0.000591
## [787]    validation_0-rmse:0.000591
## [788]    validation_0-rmse:0.000585
## [789]    validation_0-rmse:0.00058
## [790]    validation_0-rmse:0.000574
## [791]    validation_0-rmse:0.000574
## [792]    validation_0-rmse:0.000568
## [793]    validation_0-rmse:0.000563
## [794]    validation_0-rmse:0.000558
## [795]    validation_0-rmse:0.000552
## [796]    validation_0-rmse:0.000547
## [797]    validation_0-rmse:0.000542
## [798]    validation_0-rmse:0.000537
## [799]    validation_0-rmse:0.000532
## [800]    validation_0-rmse:0.000527
## [801]    validation_0-rmse:0.000526
## [802]    validation_0-rmse:0.000526
## [803]    validation_0-rmse:0.000521
## [804]    validation_0-rmse:0.000521
## [805]    validation_0-rmse:0.000516
## [806]    validation_0-rmse:0.000515
## [807]    validation_0-rmse:0.000511
## [808]    validation_0-rmse:0.000506
## [809]    validation_0-rmse:0.000501
## [810]    validation_0-rmse:0.000496
## [811]    validation_0-rmse:0.000492
## [812]    validation_0-rmse:0.000492
## [813]    validation_0-rmse:0.000488
## [814]    validation_0-rmse:0.000483
## [815]    validation_0-rmse:0.000483
## [816]    validation_0-rmse:0.000484
## [817]    validation_0-rmse:0.000479
## [818]    validation_0-rmse:0.000474
## [819]    validation_0-rmse:0.00047
## [820]    validation_0-rmse:0.000471
## [821]    validation_0-rmse:0.000471
## [822]    validation_0-rmse:0.000467
## [823]    validation_0-rmse:0.000463
## [824]    validation_0-rmse:0.000458
## [825]    validation_0-rmse:0.000454
## [826]    validation_0-rmse:0.00045
## [827]    validation_0-rmse:0.000446
## [828]    validation_0-rmse:0.000441
## [829]    validation_0-rmse:0.000441
## [830]    validation_0-rmse:0.000437
## [831]    validation_0-rmse:0.000433
## [832]    validation_0-rmse:0.000429
## [833]    validation_0-rmse:0.000425
## [834]    validation_0-rmse:0.000424
## [835]    validation_0-rmse:0.00042
## [836]    validation_0-rmse:0.000417
## [837]    validation_0-rmse:0.000413
## [838]    validation_0-rmse:0.000409
## [839]    validation_0-rmse:0.000408
## [840]    validation_0-rmse:0.000405
## [841]    validation_0-rmse:0.000404
## [842]    validation_0-rmse:0.000404
## [843]    validation_0-rmse:0.0004
## [844]    validation_0-rmse:0.0004
## [845]    validation_0-rmse:0.000396
## [846]    validation_0-rmse:0.000393
## [847]    validation_0-rmse:0.000389
## [848]    validation_0-rmse:0.00039
## [849]    validation_0-rmse:0.000386
## [850]    validation_0-rmse:0.000386
## [851]    validation_0-rmse:0.000382
## [852]    validation_0-rmse:0.000379
## [853]    validation_0-rmse:0.000379
## [854]    validation_0-rmse:0.000375
## [855]    validation_0-rmse:0.000375
## [856]    validation_0-rmse:0.000371
## [857]    validation_0-rmse:0.000368
## [858]    validation_0-rmse:0.000368
## [859]    validation_0-rmse:0.000364
## [860]    validation_0-rmse:0.000365
## [861]    validation_0-rmse:0.000365
## [862]    validation_0-rmse:0.000361
## [863]    validation_0-rmse:0.000358
## [864]    validation_0-rmse:0.000355
## [865]    validation_0-rmse:0.000351
## [866]    validation_0-rmse:0.000351
## [867]    validation_0-rmse:0.000348
## [868]    validation_0-rmse:0.000345
## [869]    validation_0-rmse:0.000345
## [870]    validation_0-rmse:0.000342
## [871]    validation_0-rmse:0.000339
## [872]    validation_0-rmse:0.000336
## [873]    validation_0-rmse:0.000334
## [874]    validation_0-rmse:0.000333
## [875]    validation_0-rmse:0.000331
## [876]    validation_0-rmse:0.00033
## [877]    validation_0-rmse:0.000329
## [878]    validation_0-rmse:0.000327
## [879]    validation_0-rmse:0.000326
## [880]    validation_0-rmse:0.000324
## [881]    validation_0-rmse:0.000323
## [882]    validation_0-rmse:0.000322
## [883]    validation_0-rmse:0.000321
## [884]    validation_0-rmse:0.000319
## [885]    validation_0-rmse:0.000318
## [886]    validation_0-rmse:0.000317
## [887]    validation_0-rmse:0.000316
## [888]    validation_0-rmse:0.000315
## [889]    validation_0-rmse:0.000313
## [890]    validation_0-rmse:0.000312
## [891]    validation_0-rmse:0.000311
## [892]    validation_0-rmse:0.00031
## [893]    validation_0-rmse:0.000309
## [894]    validation_0-rmse:0.000308
## [895]    validation_0-rmse:0.000307
## [896]    validation_0-rmse:0.000306
## [897]    validation_0-rmse:0.000305
## [898]    validation_0-rmse:0.000304
## [899]    validation_0-rmse:0.000303
## [900]    validation_0-rmse:0.000303
## [901]    validation_0-rmse:0.000302
## [902]    validation_0-rmse:0.000302
## [903]    validation_0-rmse:0.000301
## [904]    validation_0-rmse:0.0003
## [905]    validation_0-rmse:0.000299
## [906]    validation_0-rmse:0.000299
## [907]    validation_0-rmse:0.000298
## [908]    validation_0-rmse:0.000298
## [909]    validation_0-rmse:0.000298
## [910]    validation_0-rmse:0.000297
## [911]    validation_0-rmse:0.000297
## [912]    validation_0-rmse:0.000296
## [913]    validation_0-rmse:0.000295
## [914]    validation_0-rmse:0.000294
## [915]    validation_0-rmse:0.000295
## [916]    validation_0-rmse:0.000294
## [917]    validation_0-rmse:0.000293
## [918]    validation_0-rmse:0.000293
## [919]    validation_0-rmse:0.000293
## [920]    validation_0-rmse:0.000293
## [921]    validation_0-rmse:0.000293
## [922]    validation_0-rmse:0.000292
## [923]    validation_0-rmse:0.000292
## [924]    validation_0-rmse:0.000292
## [925]    validation_0-rmse:0.000292
## [926]    validation_0-rmse:0.000291
## [927]    validation_0-rmse:0.000291
## [928]    validation_0-rmse:0.00029
## [929]    validation_0-rmse:0.000289
## [930]    validation_0-rmse:0.000289
## [931]    validation_0-rmse:0.000288
## [932]    validation_0-rmse:0.000287
## [933]    validation_0-rmse:0.000287
## [934]    validation_0-rmse:0.000287
## [935]    validation_0-rmse:0.000286
## [936]    validation_0-rmse:0.000286
## [937]    validation_0-rmse:0.000285
## [938]    validation_0-rmse:0.000286
## [939]    validation_0-rmse:0.000285
## [940]    validation_0-rmse:0.000285
## [941]    validation_0-rmse:0.000286
## [942]    validation_0-rmse:0.000286
## [943]    validation_0-rmse:0.000285
## [944]    validation_0-rmse:0.000285
## [945]    validation_0-rmse:0.000285
## [946]    validation_0-rmse:0.000284
## [947]    validation_0-rmse:0.000284
## [948]    validation_0-rmse:0.000283
## [949]    validation_0-rmse:0.000282
## [950]    validation_0-rmse:0.000282
## [951]    validation_0-rmse:0.000282
## [952]    validation_0-rmse:0.000281
## [953]    validation_0-rmse:0.000281
## [954]    validation_0-rmse:0.00028
## [955]    validation_0-rmse:0.00028
## [956]    validation_0-rmse:0.000279
## [957]    validation_0-rmse:0.000279
## [958]    validation_0-rmse:0.000279
## [959]    validation_0-rmse:0.000278
## [960]    validation_0-rmse:0.000278
## [961]    validation_0-rmse:0.000277
## [962]    validation_0-rmse:0.000277
## [963]    validation_0-rmse:0.000277
## [964]    validation_0-rmse:0.000277
## [965]    validation_0-rmse:0.000277
## [966]    validation_0-rmse:0.000278
## [967]    validation_0-rmse:0.000277
## [968]    validation_0-rmse:0.000277
## [969]    validation_0-rmse:0.000277
## [970]    validation_0-rmse:0.000277
## [971]    validation_0-rmse:0.000277
## Stopping. Best iteration:
## [961]    validation_0-rmse:0.000277
## 
## XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
##              colsample_bynode=1, colsample_bytree=0.8, gamma=0,
##              importance_type='gain', learning_rate=0.01, max_delta_step=0,
##              max_depth=10, min_child_weight=9, missing=None, n_estimators=10000,
##              n_jobs=1, nthread=8, objective='reg:logistic', random_state=0,
##              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=4242,
##              silent=None, subsample=0.8, verbosity=1)
print((time.process_time() - start_time)/60, "Minutes")
## 4.1780074 Minutes
# Predict
preds = clf.predict(val_x)

## Wich max prob
import numpy as np
best_preds = np.asarray([np.argmax(line) for line in preds])

# Determine the precision of this prediction:
from sklearn.metrics import precision_score
precision_score(val_y, best_preds, average='macro')
## 0.46495489863356254
## 
## /Users/Mezhoud/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
##   _warn_prf(average, modifier, msg_start, len(result))
#xtest = test.drop(['Target', 'weeks', 'Latitude', 'Longitude'], axis=1)
test_ID = xtest.Square_ID
xtest = xtest.drop('Square_ID', axis = 1)
pred_test = clf.predict(xtest)
submission = pd.concat([pd.DataFrame(test_ID), pd.DataFrame(pred_test)], axis = 1)
submission.columns = ['Square_ID', 'pred']

submission.to_csv("pred_test.csv")
submission.head
## <bound method NDFrame.head of        Square_ID      pred
## 0       4e3c3896  0.000041
## 1       4e3c3897  0.000041
## 2       4e3c3898  0.000041
## 3       4e3c3899  0.000041
## 4       4e3c389a  0.000041
## ...          ...       ...
## 279917  4e6f5dfd  0.000042
## 279918  4e6f5dfe  0.000042
## 279919  4e6f5dff  0.000042
## 279920  4e6f5e00  0.000042
## 279921  4e6f5e01  0.000047
## 
## [279922 rows x 2 columns]>

7.1 Visualize prediction

pred_test <- fread("pred_test.csv")



new_train[new_train$Year == 2019,]$Target <- pred_test$pred

ggplot(new_train, aes(Target)) + geom_density()

ggplot(pred_test,aes(x=pred)) + geom_density()

new_train %>%
  #filter(Year == 2019) %>%
  group_by(Year) %>%
  filter(`Week of` == max(`Week of`))%>%
  ungroup()%>%
  mutate(Height = as.factor(Height)) %>%
    ggplot +
  #geom_point(aes(x = Longitude, y = Latitude, colour = Height)) +
  geom_point(#data = subset(new_train, Target == 1), 
             aes(x = Longitude, y = Latitude, color = Target, shape = Target),
            size = 0.8, stroke = 0, shape = 16) +
  facet_wrap(Year~., ncol = 2)

sub <- new_train %>%
  mutate(Square_ID = paste(Square_ID, other, sep = "-")) %>%
  filter(`Week of` >= "2019-05-07") %>%
  rename(target_2019 = Target ) %>%
  select(Square_ID,target_2019)

fwrite(sub, "sub.csv")